# Load packages and install them if necessary (automatic)
need = c("dplyr",  #data structuring/cleaning
         "haven",  #load data
         "here",   #setting wd
         "scales", #convenience function to recode to 0-1
         "tidyr")  #data structuring/cleaning
have = need %in% rownames(installed.packages())
if ( any(!have) ) { install.packages( need[!have] ) }
pack <- lapply(need, library, character.only = TRUE)
rm(have,need,pack)

# Simple function for data cleaning 
convert <- function(x) {
  as.numeric(as.character(x))
}

# Set wd
here::here()

# Load data
anes <- read_spss("D:/dv_rep/Datasets/anes_timeseries_cdf_spss_20220916.sav")

# Data cleaning
anes_Dat <- anes %>%
  rename(
    partyid     = VCF0301, #pid
    year        = VCF0004, #year
    age         = VCF0101, #age
    white       = VCF0106, #white
    educ        = VCF0140, #education
    polviews    = VCF0803, #libcon
    chatt       = VCF0130, #church attendance (note: once or twice a year in ANES)
    relig       = VCF0128, #religion
    govservspen = VCF0839, #government services v. spend
    govjobinc2  = VCF0809, #government ensure jobs and income
    govhealth   = VCF0806, #government health-care
    blkaid      = VCF0830, #aid to blacks
    womnequal   = VCF0834, #equality women
    abortlaw2   = VCF0838, #abortion
    immig       = VCF0879, #immigration 
    spendwp     = VCF0894, #government expenditures
    thermgays   = VCF0232, #thermometer same-sex
    spendsoc    = VCF9049, #federal spending: social security
    valuelifestyle = VCF0851, #values lifestyle
    valuemoral  = VCF0852, #values morality
    valuetrad   = VCF0853, #values tradition
    valuetol    = VCF0854, #values tolerance
    ) %>%
  select(!starts_with('VCF')) %>% #deselect all variables we don't use
  mutate_all(convert) %>% #convert from haven labelled. this will return a warning, which can be safely ignored
  filter(!is.na(educ) & !is.na(white) & !is.na(relig) & !is.na(chatt) & !is.na(partyid) & !is.na(age)) %>% # filter missings on controls
  mutate(
    across(c(starts_with('value'), immig, spendwp), ~na_if(., 8)),
    spendsoc = case_when(spendsoc > 6 ~ NA_real_, TRUE ~ spendsoc),
    across(c(starts_with('gov'), blkaid, womnequal, abortlaw2, polviews), ~na_if(., 9)),
    cohort = year - age,
    white = case_when(white > 1 ~ 0, TRUE ~ 1),
    college = case_when(educ < 5 ~ 0, TRUE ~ 1),
    protcattend = case_when(chatt < 4 & relig == 1 ~ 1, TRUE ~ 0),
    mutate(across(c("govservspen", "abortlaw2", "valuelifestyle", "valuetrad", "thermgays"), ~(max(., na.rm = TRUE)-.+1))))

# Set cohorts and age groups, reference categories, etc.    
anes_Dat <- anes_Dat %>%
  mutate(
    genDec = cut(
      cohort, 
      breaks = c(1909, 1919, 1929, 1939, 1949, 1959, 1969, 1979, 1989, 1999),
      labels = c('1910-19', '1920-29', '1930-39', '1940-49', '1950-59', '1960-69', '1970-79', '1980-89', '1990-99')
    ),
    genPew = cut(
      cohort, 
      breaks = c(1909, 1927, 1945, 1964, 1980, 1996),
      labels = c('Greatest', 'Silent', 'Boomer', 'Gen X', 'Millennial')
    ),
    age = as.numeric(age),
    year = as.factor(year),
    ageGroup = cut(
      age, 
      breaks = c(15, 21, 29, 64, 100),
      labels = c('17-21', '22-29', '30-64', '65+')
    )
  ) %>%
  # Set reference categories
  mutate(
    year = relevel(year, ref = "1996"),
    genPew = relevel(genPew, ref = "Boomer"),
    ageGroup = relevel(ageGroup, ref = "30-64"),
    genDec = relevel(genDec, ref = "1950-59")
  )

# Save Immigration data, which will be used in the main models
anes_Immig <- anes_Dat %>%
  select(year, genPew, genDec, genHalf, genAdd, genSub, ageGroup, immig, partyid, white, college, protcattend) %>%
  drop_na() %>%
  mutate(year = relevel(year, ref = "2020")) %>% #change ref. category because the default is missing
  filter(!genPew == "Greatest") %>% #we drop the Greatest generation respondents from this model as 'immig' has been introduced relatively late in the ANES
  droplevels() %>%
  mutate(across(c(immig, partyid), ~rescale(., to = c(0, 1))))

# Recode DV
anes_Immig$att <- anes_Immig$immig

# Save immigration subset
save(anes_Immig, file = "D:/dv_rep/Datasets/anes_Immig.Rdata")

# Save the rest of the data
save(anes_Dat, file = "D:/dv_rep/Datasets/anes_Dat.Rdata")